import re
import time

import requests
from lxml import html, etree

url = 'https://www.4399.com/'
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
#     'Cookie': '_4399stats_vid=17101453199871958; UM_distinctid=18e2c9ab48d32f-033fde1f44b279-26001b51-1fa400-18e2c9ab48e9c0; gdc_userMark=u31ws87sP41VJ-14Fq21Tc71ZH7-1io26ma32Dw58-Vc45og37Ro24X; gdc_webRecordId=a2ecdfd1-4adc54-588019; gdc_newStatCid=3001; gdc_newStatOid1=12728; webRecordIdP=26qts3th-4cg710-9j5n7; global_hs=4399.com%7C%7C%7C4399%u4EE5%u592A%u4E4B%u5149%7C%7Cnews.4399.com/ytzg/%7C%7C0; _gprp_c=""; cookie_hs=4399.com%7C%7C%7C%u529F%u592B%u4F20%u5947%7C%7C239936_4%7C%7C0%7C%7C%7C4399%u5F39%u5F39%u5802%7C%7C12439%7C%7C0; home4399=yes; CNZZDATA30039538=cnzz_eid%3D1532837303-1710145320-%26ntime%3D1715857289; Hm_lvt_334aca66d28b3b338a76075366b2b9e8=1714111626,1715607414,1715857289; Hm_lpvt_334aca66d28b3b338a76075366b2b9e8=1715857289',
#     'Referer': 'https://www.so.com/link?m=w9klPSmueqPeAYj7pkjdqFKGEHhLV4RxuP%2FaoXB20%2BLeT6EI1vu7zkfV0mYJNlb91jPuwVs%2BpXtqkwi7e%2Bx4g8xOqBp6A82CVqn0EbQh9WvLkgEHrG3S92%2FroJK1Yz1WR3CaqtQXVg5o%2FjSE8fU5pY2CznHcNtkXB'
# }
res = requests.get(url)
res = res.content.decode("gbk")
# print(res)

# html_tree = etree.HTML(res)
# titles = html_tree.xpath(
#     "//div[@class='middle_3 cf']/div[@class='box_c']/div[@class='tm_fun h_3']/ul[@class='tm_list']/li/a")
# titles = html_tree.cssselect(".middle_3 .box_c .tm_fun ul li a")
# print(titles)
datas = re.findall(
    f'<li><a  onclick="my_trace.*?;return true;"  href=".*?"><img lzimg="1" lz_src="(.*?)" alt=".*?" />(.*?)</a></li>',
    res)
# print(datas)

# for data in datas:
#     img_url = "https:" + data[0]
#     img_res = requests.get(img_url)
#     with open(f'../datas/4399/{data[1]}.jpg', 'wb') as f:
#         f.write(img_res.content)
#     print(f'{data[1]}爬取成功')
#     time.sleep(2)
